####Dan Hopkins
####Blogs Project
####R Code November 17 2006

###All words must be labeled as "WORD."
###for this code to work
###
###depends on nothing being alphbetically after "WORD"
###depends on abandon being the first word, zone the last

#condor_submit_util -i rcode1000N.R -f -N -n 1

###TRAIN SET SIZE
MM <- 1000
library(VA, "/nfs/fs1/home/D/dhopkins/.R")

####NUMBER OF ITERATIONS
n.samp <- 50

###training set size = ntr
load("/nfs/projects/p/poliblog/bushdata111106.Rdata")

first4 <- substr(colnames(dta.BUSHf),start=1,stop=4)
nwords <- sum(first4=="WORD")
sttxt <- colnames(dta.BUSHf)[1]
fntxt <- colnames(dta.BUSHf)[nwords]

nn<-apply(dta.BUSHf[,1:nwords],2,sum)/dim(dta.BUSHf)[1]
nn2 <- nn
nn2[nn>.5] <- 0
wts <- nn2/sum(nn2)

resmat <- tmat <- actmat <-matrix(NA,n.samp,7)
for(i in 1:n.samp){
  rs <- sample(1:dim(dta.BUSHf)[1],MM,replace=F)
  train <- dta.BUSHf[rs,]
  test <- dta.BUSHf
  txt <- paste("vout1<-va(cbind(",sttxt,"+...+",fntxt,")~jointcode,data=list(train,test),nsymp=45,n.subset=150,prob.wt=wts)",sep="")
  eval(parse(text=txt))
  resmat[i,] <- vout1$est.CSMF
  tmat[i,] <- vout1$true.CSMF
  actmat[i,1] <- round(mean(1*(train$jointcode==-2)),digits=6)
  actmat[i,2] <- round(mean(1*(train$jointcode==-1)),digits=6)
  actmat[i,3] <- round(mean(1*(train$jointcode==0)),digits=6)
  actmat[i,4] <- round(mean(1*(train$jointcode==1)),digits=6)
  actmat[i,5] <- round(mean(1*(train$jointcode==2)),digits=6)
  actmat[i,6] <- round(mean(1*(train$jointcode==3)),digits=6)
  actmat[i,7] <- round(mean(1*(train$jointcode==4)),digits=6) 
}
txt1<-paste("/nfs/fs1/home/D/dhopkins/condor/resmatN",MM,".dat",sep="")
txt2<-paste("/nfs/fs1/home/D/dhopkins/condor/tmatN",MM,".dat",sep="")
txt3<-paste("/nfs/fs1/home/D/dhopkins/condor/actmatN",MM,".dat",sep="")

write.table(resmat,file=txt1)
write.table(tmat,file=txt2)
write.table(actmat,file=txt3)

